{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "synthetic_features_and_outliers.ipynb",
      "version": "0.3.2",
      "views": {},
      "default_view": {},
      "provenance": [],
      "collapsed_sections": [
        "i5Ul3zf5QYvW",
        "jByCP8hDRZmM",
        "WvgxW0bUSC-c",
        "copyright-notice"
      ]
    }
  },
  "cells": [
    {
      "source": [
        "#### Copyright 2017 Google LLC."
      ],
      "metadata": {
        "colab_type": "text",
        "id": "copyright-notice"
      },
      "cell_type": "markdown"
    },
    {
      "outputs": [],
      "source": [
        "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
        "# you may not use this file except in compliance with the License.\n",
        "# You may obtain a copy of the License at\n",
        "#\n",
        "# https://www.apache.org/licenses/LICENSE-2.0\n",
        "#\n",
        "# Unless required by applicable law or agreed to in writing, software\n",
        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
        "# See the License for the specific language governing permissions and\n",
        "# limitations under the License."
      ],
      "metadata": {
        "colab": {
          "autoexec": {
            "wait_interval": 0,
            "startup": false
          }
        },
        "cellView": "both",
        "colab_type": "code",
        "id": "copyright-notice2"
      },
      "cell_type": "code",
      "execution_count": 0
    },
    {
      "metadata": {
        "id": "4f3CKqFUqL2-",
        "colab_type": "text",
        "slideshow": {
          "slide_type": "slide"
        }
      },
      "cell_type": "markdown",
      "source": [
        " # \u5408\u6210\u7279\u5f81\u548c\u79bb\u7fa4\u503c"
      ]
    },
    {
      "metadata": {
        "id": "jnKgkN5fHbGy",
        "colab_type": "text"
      },
      "cell_type": "markdown",
      "source": [
        " **\u5b66\u4e60\u76ee\u6807\uff1a**\n",
        "  * \u521b\u5efa\u4e00\u4e2a\u5408\u6210\u7279\u5f81\uff0c\u5373\u53e6\u5916\u4e24\u4e2a\u7279\u5f81\u7684\u6bd4\u4f8b\n",
        "  * \u5c06\u6b64\u65b0\u7279\u5f81\u7528\u4f5c\u7ebf\u6027\u56de\u5f52\u6a21\u578b\u7684\u8f93\u5165\n",
        "  * \u901a\u8fc7\u8bc6\u522b\u548c\u622a\u53d6\uff08\u79fb\u9664\uff09\u8f93\u5165\u6570\u636e\u4e2d\u7684\u79bb\u7fa4\u503c\u6765\u63d0\u9ad8\u6a21\u578b\u7684\u6709\u6548\u6027"
      ]
    },
    {
      "metadata": {
        "id": "VOpLo5dcHbG0",
        "colab_type": "text"
      },
      "cell_type": "markdown",
      "source": [
        " \u6211\u4eec\u6765\u56de\u987e\u4e0b\u4e4b\u524d\u7684\u201c\u4f7f\u7528 TensorFlow \u7684\u57fa\u672c\u6b65\u9aa4\u201d\u7ec3\u4e60\u4e2d\u7684\u6a21\u578b\u3002\n",
        "\n",
        "\u9996\u5148\uff0c\u6211\u4eec\u5c06\u52a0\u5229\u798f\u5c3c\u4e9a\u5dde\u4f4f\u623f\u6570\u636e\u5bfc\u5165 *Pandas* `DataFrame` \u4e2d\uff1a"
      ]
    },
    {
      "metadata": {
        "id": "S8gm6BpqRRuh",
        "colab_type": "text"
      },
      "cell_type": "markdown",
      "source": [
        " ## \u8bbe\u7f6e"
      ]
    },
    {
      "metadata": {
        "id": "9D8GgUovHbG0",
        "colab_type": "code",
        "colab": {
          "autoexec": {
            "startup": false,
            "wait_interval": 0
          }
        }
      },
      "source": [
        "from __future__ import print_function\n",
        "\n",
        "import math\n",
        "\n",
        "from IPython import display\n",
        "from matplotlib import cm\n",
        "from matplotlib import gridspec\n",
        "import matplotlib.pyplot as plt\n",
        "import numpy as np\n",
        "import pandas as pd\n",
        "import sklearn.metrics as metrics\n",
        "import tensorflow as tf\n",
        "from tensorflow.python.data import Dataset\n",
        "\n",
        "tf.logging.set_verbosity(tf.logging.ERROR)\n",
        "pd.options.display.max_rows = 10\n",
        "pd.options.display.float_format = '{:.1f}'.format\n",
        "\n",
        "california_housing_dataframe = pd.read_csv(\"https://download.mlcc.google.cn/mledu-datasets/california_housing_train.csv\", sep=\",\")\n",
        "\n",
        "california_housing_dataframe = california_housing_dataframe.reindex(\n",
        "    np.random.permutation(california_housing_dataframe.index))\n",
        "california_housing_dataframe[\"median_house_value\"] /= 1000.0\n",
        "california_housing_dataframe"
      ],
      "cell_type": "code",
      "execution_count": 0,
      "outputs": []
    },
    {
      "metadata": {
        "id": "I6kNgrwCO_ms",
        "colab_type": "text"
      },
      "cell_type": "markdown",
      "source": [
        " \u63a5\u4e0b\u6765\uff0c\u6211\u4eec\u5c06\u8bbe\u7f6e\u8f93\u5165\u51fd\u6570\uff0c\u5e76\u9488\u5bf9\u6a21\u578b\u8bad\u7ec3\u6765\u5b9a\u4e49\u8be5\u51fd\u6570\uff1a"
      ]
    },
    {
      "metadata": {
        "id": "5RpTJER9XDub",
        "colab_type": "code",
        "colab": {
          "autoexec": {
            "startup": false,
            "wait_interval": 0
          }
        }
      },
      "source": [
        "def my_input_fn(features, targets, batch_size=1, shuffle=True, num_epochs=None):\n",
        "    \"\"\"Trains a linear regression model of one feature.\n",
        "  \n",
        "    Args:\n",
        "      features: pandas DataFrame of features\n",
        "      targets: pandas DataFrame of targets\n",
        "      batch_size: Size of batches to be passed to the model\n",
        "      shuffle: True or False. Whether to shuffle the data.\n",
        "      num_epochs: Number of epochs for which data should be repeated. None = repeat indefinitely\n",
        "    Returns:\n",
        "      Tuple of (features, labels) for next data batch\n",
        "    \"\"\"\n",
        "    \n",
        "    # Convert pandas data into a dict of np arrays.\n",
        "    features = {key:np.array(value) for key,value in dict(features).items()}                                           \n",
        " \n",
        "    # Construct a dataset, and configure batching/repeating.\n",
        "    ds = Dataset.from_tensor_slices((features,targets)) # warning: 2GB limit\n",
        "    ds = ds.batch(batch_size).repeat(num_epochs)\n",
        "    \n",
        "    # Shuffle the data, if specified.\n",
        "    if shuffle:\n",
        "      ds = ds.shuffle(buffer_size=10000)\n",
        "    \n",
        "    # Return the next batch of data.\n",
        "    features, labels = ds.make_one_shot_iterator().get_next()\n",
        "    return features, labels"
      ],
      "cell_type": "code",
      "execution_count": 0,
      "outputs": []
    },
    {
      "metadata": {
        "id": "VgQPftrpHbG3",
        "colab_type": "code",
        "colab": {
          "autoexec": {
            "startup": false,
            "wait_interval": 0
          }
        }
      },
      "source": [
        "def train_model(learning_rate, steps, batch_size, input_feature):\n",
        "  \"\"\"Trains a linear regression model.\n",
        "  \n",
        "  Args:\n",
        "    learning_rate: A `float`, the learning rate.\n",
        "    steps: A non-zero `int`, the total number of training steps. A training step\n",
        "      consists of a forward and backward pass using a single batch.\n",
        "    batch_size: A non-zero `int`, the batch size.\n",
        "    input_feature: A `string` specifying a column from `california_housing_dataframe`\n",
        "      to use as input feature.\n",
        "      \n",
        "  Returns:\n",
        "    A Pandas `DataFrame` containing targets and the corresponding predictions done\n",
        "    after training the model.\n",
        "  \"\"\"\n",
        "  \n",
        "  periods = 10\n",
        "  steps_per_period = steps / periods\n",
        "\n",
        "  my_feature = input_feature\n",
        "  my_feature_data = california_housing_dataframe[[my_feature]].astype('float32')\n",
        "  my_label = \"median_house_value\"\n",
        "  targets = california_housing_dataframe[my_label].astype('float32')\n",
        "\n",
        "  # Create input functions.\n",
        "  training_input_fn = lambda: my_input_fn(my_feature_data, targets, batch_size=batch_size)\n",
        "  predict_training_input_fn = lambda: my_input_fn(my_feature_data, targets, num_epochs=1, shuffle=False)\n",
        "  \n",
        "  # Create feature columns.\n",
        "  feature_columns = [tf.feature_column.numeric_column(my_feature)]\n",
        "    \n",
        "  # Create a linear regressor object.\n",
        "  my_optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)\n",
        "  my_optimizer = tf.contrib.estimator.clip_gradients_by_norm(my_optimizer, 5.0)\n",
        "  linear_regressor = tf.estimator.LinearRegressor(\n",
        "      feature_columns=feature_columns,\n",
        "      optimizer=my_optimizer\n",
        "  )\n",
        "\n",
        "  # Set up to plot the state of our model's line each period.\n",
        "  plt.figure(figsize=(15, 6))\n",
        "  plt.subplot(1, 2, 1)\n",
        "  plt.title(\"Learned Line by Period\")\n",
        "  plt.ylabel(my_label)\n",
        "  plt.xlabel(my_feature)\n",
        "  sample = california_housing_dataframe.sample(n=300)\n",
        "  plt.scatter(sample[my_feature], sample[my_label])\n",
        "  colors = [cm.coolwarm(x) for x in np.linspace(-1, 1, periods)]\n",
        "\n",
        "  # Train the model, but do so inside a loop so that we can periodically assess\n",
        "  # loss metrics.\n",
        "  print(\"Training model...\")\n",
        "  print(\"RMSE (on training data):\")\n",
        "  root_mean_squared_errors = []\n",
        "  for period in range (0, periods):\n",
        "    # Train the model, starting from the prior state.\n",
        "    linear_regressor.train(\n",
        "        input_fn=training_input_fn,\n",
        "        steps=steps_per_period,\n",
        "    )\n",
        "    # Take a break and compute predictions.\n",
        "    predictions = linear_regressor.predict(input_fn=predict_training_input_fn)\n",
        "    predictions = np.array([item['predictions'][0] for item in predictions])\n",
        "    \n",
        "    # Compute loss.\n",
        "    root_mean_squared_error = math.sqrt(\n",
        "      metrics.mean_squared_error(predictions, targets))\n",
        "    # Occasionally print the current loss.\n",
        "    print(\"  period %02d : %0.2f\" % (period, root_mean_squared_error))\n",
        "    # Add the loss metrics from this period to our list.\n",
        "    root_mean_squared_errors.append(root_mean_squared_error)\n",
        "    # Finally, track the weights and biases over time.\n",
        "    # Apply some math to ensure that the data and line are plotted neatly.\n",
        "    y_extents = np.array([0, sample[my_label].max()])\n",
        "    \n",
        "    weight = linear_regressor.get_variable_value('linear/linear_model/%s/weights' % input_feature)[0]\n",
        "    bias = linear_regressor.get_variable_value('linear/linear_model/bias_weights')\n",
        "    \n",
        "    x_extents = (y_extents - bias) / weight\n",
        "    x_extents = np.maximum(np.minimum(x_extents,\n",
        "                                      sample[my_feature].max()),\n",
        "                           sample[my_feature].min())\n",
        "    y_extents = weight * x_extents + bias\n",
        "    plt.plot(x_extents, y_extents, color=colors[period]) \n",
        "  print(\"Model training finished.\")\n",
        "\n",
        "  # Output a graph of loss metrics over periods.\n",
        "  plt.subplot(1, 2, 2)\n",
        "  plt.ylabel('RMSE')\n",
        "  plt.xlabel('Periods')\n",
        "  plt.title(\"Root Mean Squared Error vs. Periods\")\n",
        "  plt.tight_layout()\n",
        "  plt.plot(root_mean_squared_errors)\n",
        "\n",
        "  # Create a table with calibration data.\n",
        "  calibration_data = pd.DataFrame()\n",
        "  calibration_data[\"predictions\"] = pd.Series(predictions)\n",
        "  calibration_data[\"targets\"] = pd.Series(targets)\n",
        "  display.display(calibration_data.describe())\n",
        "\n",
        "  print(\"Final RMSE (on training data): %0.2f\" % root_mean_squared_error)\n",
        "  \n",
        "  return calibration_data"
      ],
      "cell_type": "code",
      "execution_count": 0,
      "outputs": []
    },
    {
      "metadata": {
        "id": "FJ6xUNVRm-do",
        "colab_type": "text",
        "slideshow": {
          "slide_type": "slide"
        }
      },
      "cell_type": "markdown",
      "source": [
        " ## \u4efb\u52a1 1\uff1a\u5c1d\u8bd5\u5408\u6210\u7279\u5f81\n",
        "\n",
        "`total_rooms` \u548c `population` \u7279\u5f81\u90fd\u4f1a\u7edf\u8ba1\u6307\u5b9a\u8857\u533a\u7684\u76f8\u5173\u603b\u8ba1\u6570\u636e\u3002\n",
        "\n",
        "\u4f46\u662f\uff0c\u5982\u679c\u4e00\u4e2a\u8857\u533a\u6bd4\u53e6\u4e00\u4e2a\u8857\u533a\u7684\u4eba\u53e3\u66f4\u5bc6\u96c6\uff0c\u4f1a\u600e\u4e48\u6837\uff1f\u6211\u4eec\u53ef\u4ee5\u521b\u5efa\u4e00\u4e2a\u5408\u6210\u7279\u5f81\uff08\u5373 `total_rooms` \u4e0e `population` \u7684\u6bd4\u4f8b\uff09\u6765\u63a2\u7d22\u8857\u533a\u4eba\u53e3\u5bc6\u5ea6\u4e0e\u623f\u5c4b\u4ef7\u503c\u4e2d\u4f4d\u6570\u4e4b\u95f4\u7684\u5173\u7cfb\u3002\n",
        "\n",
        "\u5728\u4ee5\u4e0b\u5355\u5143\u683c\u4e2d\uff0c\u521b\u5efa\u4e00\u4e2a\u540d\u4e3a `rooms_per_person` \u7684\u7279\u5f81\uff0c\u5e76\u5c06\u5176\u7528\u4f5c `train_model()` \u7684 `input_feature`\u3002\n",
        "\n",
        "\u901a\u8fc7\u8c03\u6574\u5b66\u4e60\u901f\u7387\uff0c\u60a8\u4f7f\u7528\u8fd9\u4e00\u7279\u5f81\u53ef\u4ee5\u83b7\u5f97\u7684\u6700\u4f73\u6548\u679c\u662f\u4ec0\u4e48\uff1f\uff08\u6548\u679c\u8d8a\u597d\uff0c\u56de\u5f52\u7ebf\u4e0e\u6570\u636e\u7684\u62df\u5408\u5ea6\u5c31\u8d8a\u9ad8\uff0c\u6700\u7ec8 RMSE \u4e5f\u4f1a\u8d8a\u4f4e\u3002\uff09"
      ]
    },
    {
      "metadata": {
        "id": "isONN2XK32Wo",
        "colab_type": "text"
      },
      "cell_type": "markdown",
      "source": [
        " **\u6ce8\u610f**\uff1a\u5728\u4e0b\u9762\u6dfb\u52a0\u4e00\u4e9b\u4ee3\u7801\u5355\u5143\u683c\u53ef\u80fd\u6709\u5e2e\u52a9\uff0c\u8fd9\u6837\u60a8\u5c31\u53ef\u4ee5\u5c1d\u8bd5\u51e0\u79cd\u4e0d\u540c\u7684\u5b66\u4e60\u901f\u7387\u5e76\u6bd4\u8f83\u7ed3\u679c\u3002\u8981\u6dfb\u52a0\u65b0\u7684\u4ee3\u7801\u5355\u5143\u683c\uff0c\u8bf7\u5c06\u5149\u6807\u60ac\u505c\u5728\u8be5\u5355\u5143\u683c\u4e2d\u5fc3\u7684\u6b63\u4e0b\u65b9\uff0c\u7136\u540e\u70b9\u51fb**\u4ee3\u7801**\u3002"
      ]
    },
    {
      "metadata": {
        "id": "5ihcVutnnu1D",
        "colab_type": "code",
        "slideshow": {
          "slide_type": "slide"
        },
        "colab": {
          "autoexec": {
            "startup": false,
            "wait_interval": 0
          },
          "test": {
            "output": "ignore",
            "timeout": 600
          }
        },
        "cellView": "both"
      },
      "source": [
        "#\n",
        "# YOUR CODE HERE\n",
        "#\n",
        "california_housing_dataframe[\"rooms_per_person\"] =\n",
        "\n",
        "calibration_data = train_model(\n",
        "    learning_rate=0.00005,\n",
        "    steps=500,\n",
        "    batch_size=5,\n",
        "    input_feature=\"rooms_per_person\"\n",
        ")"
      ],
      "cell_type": "code",
      "execution_count": 0,
      "outputs": []
    },
    {
      "metadata": {
        "id": "i5Ul3zf5QYvW",
        "colab_type": "text"
      },
      "cell_type": "markdown",
      "source": [
        " ### \u89e3\u51b3\u65b9\u6848\n",
        "\n",
        "\u70b9\u51fb\u4e0b\u65b9\u5373\u53ef\u67e5\u770b\u89e3\u51b3\u65b9\u6848\u3002"
      ]
    },
    {
      "metadata": {
        "id": "Leaz2oYMQcBf",
        "colab_type": "code",
        "colab": {
          "autoexec": {
            "startup": false,
            "wait_interval": 0
          }
        }
      },
      "source": [
        "california_housing_dataframe[\"rooms_per_person\"] = (\n",
        "    california_housing_dataframe[\"total_rooms\"] / california_housing_dataframe[\"population\"])\n",
        "\n",
        "calibration_data = train_model(\n",
        "    learning_rate=0.05,\n",
        "    steps=500,\n",
        "    batch_size=5,\n",
        "    input_feature=\"rooms_per_person\")"
      ],
      "cell_type": "code",
      "execution_count": 0,
      "outputs": []
    },
    {
      "metadata": {
        "id": "ZjQrZ8mcHFiU",
        "colab_type": "text",
        "slideshow": {
          "slide_type": "slide"
        }
      },
      "cell_type": "markdown",
      "source": [
        " ## \u4efb\u52a1 2\uff1a\u8bc6\u522b\u79bb\u7fa4\u503c\n",
        "\n",
        "\u6211\u4eec\u53ef\u4ee5\u901a\u8fc7\u521b\u5efa\u9884\u6d4b\u503c\u4e0e\u76ee\u6807\u503c\u7684\u6563\u70b9\u56fe\u6765\u53ef\u89c6\u5316\u6a21\u578b\u6548\u679c\u3002\u7406\u60f3\u60c5\u51b5\u4e0b\uff0c\u8fd9\u4e9b\u503c\u5c06\u4f4d\u4e8e\u4e00\u6761\u5b8c\u5168\u76f8\u5173\u7684\u5bf9\u89d2\u7ebf\u4e0a\u3002\n",
        "\n",
        "\u4f7f\u7528\u60a8\u5728\u4efb\u52a1 1 \u4e2d\u8bad\u7ec3\u8fc7\u7684\u4eba\u5747\u623f\u95f4\u6570\u6a21\u578b\uff0c\u5e76\u4f7f\u7528 Pyplot \u7684 `scatter()` \u521b\u5efa\u9884\u6d4b\u503c\u4e0e\u76ee\u6807\u503c\u7684\u6563\u70b9\u56fe\u3002\n",
        "\n",
        "\u60a8\u662f\u5426\u770b\u5230\u4efb\u4f55\u5f02\u5e38\u60c5\u51b5\uff1f\u901a\u8fc7\u67e5\u770b `rooms_per_person` \u4e2d\u503c\u7684\u5206\u5e03\u60c5\u51b5\uff0c\u5c06\u8fd9\u4e9b\u5f02\u5e38\u60c5\u51b5\u8ffd\u6eaf\u5230\u6e90\u6570\u636e\u3002"
      ]
    },
    {
      "metadata": {
        "id": "P0BDOec4HbG_",
        "colab_type": "code",
        "colab": {
          "autoexec": {
            "startup": false,
            "wait_interval": 0
          }
        }
      },
      "source": [
        "# YOUR CODE HERE"
      ],
      "cell_type": "code",
      "execution_count": 0,
      "outputs": []
    },
    {
      "metadata": {
        "id": "jByCP8hDRZmM",
        "colab_type": "text"
      },
      "cell_type": "markdown",
      "source": [
        " ### \u89e3\u51b3\u65b9\u6848\n",
        "\n",
        "\u70b9\u51fb\u4e0b\u65b9\u5373\u53ef\u67e5\u770b\u89e3\u51b3\u65b9\u6848\u3002"
      ]
    },
    {
      "metadata": {
        "id": "s0tiX2gdRe-S",
        "colab_type": "code",
        "colab": {
          "autoexec": {
            "startup": false,
            "wait_interval": 0
          }
        }
      },
      "source": [
        "plt.figure(figsize=(15, 6))\n",
        "plt.subplot(1, 2, 1)\n",
        "plt.scatter(calibration_data[\"predictions\"], calibration_data[\"targets\"])"
      ],
      "cell_type": "code",
      "execution_count": 0,
      "outputs": []
    },
    {
      "metadata": {
        "id": "kMQD0Uq3RqTX",
        "colab_type": "text"
      },
      "cell_type": "markdown",
      "source": [
        " \u6821\u51c6\u6570\u636e\u663e\u793a\uff0c\u5927\u591a\u6570\u6563\u70b9\u4e0e\u4e00\u6761\u7ebf\u5bf9\u9f50\u3002\u8fd9\u6761\u7ebf\u51e0\u4e4e\u662f\u5782\u76f4\u7684\uff0c\u6211\u4eec\u7a0d\u540e\u518d\u8bb2\u89e3\u3002\u73b0\u5728\uff0c\u6211\u4eec\u91cd\u70b9\u5173\u6ce8\u504f\u79bb\u8fd9\u6761\u7ebf\u7684\u70b9\u3002\u6211\u4eec\u6ce8\u610f\u5230\u8fd9\u4e9b\u70b9\u7684\u6570\u91cf\u76f8\u5bf9\u8f83\u5c11\u3002\n",
        "\n",
        "\u5982\u679c\u6211\u4eec\u7ed8\u5236 `rooms_per_person` \u7684\u76f4\u65b9\u56fe\uff0c\u5219\u4f1a\u53d1\u73b0\u6211\u4eec\u7684\u8f93\u5165\u6570\u636e\u4e2d\u6709\u5c11\u91cf\u79bb\u7fa4\u503c\uff1a"
      ]
    },
    {
      "metadata": {
        "id": "POTM8C_ER1Oc",
        "colab_type": "code",
        "colab": {
          "autoexec": {
            "startup": false,
            "wait_interval": 0
          }
        }
      },
      "source": [
        "plt.subplot(1, 2, 2)\n",
        "_ = california_housing_dataframe[\"rooms_per_person\"].hist()"
      ],
      "cell_type": "code",
      "execution_count": 0,
      "outputs": []
    },
    {
      "metadata": {
        "id": "9l0KYpBQu8ed",
        "colab_type": "text"
      },
      "cell_type": "markdown",
      "source": [
        " ## \u4efb\u52a1 3\uff1a\u622a\u53d6\u79bb\u7fa4\u503c\n",
        "\n",
        "\u770b\u770b\u60a8\u80fd\u5426\u901a\u8fc7\u5c06 `rooms_per_person` \u7684\u79bb\u7fa4\u503c\u8bbe\u7f6e\u4e3a\u76f8\u5bf9\u5408\u7406\u7684\u6700\u5c0f\u503c\u6216\u6700\u5927\u503c\u6765\u8fdb\u4e00\u6b65\u6539\u8fdb\u6a21\u578b\u62df\u5408\u60c5\u51b5\u3002\n",
        "\n",
        "\u4ee5\u4e0b\u662f\u4e00\u4e2a\u5982\u4f55\u5c06\u51fd\u6570\u5e94\u7528\u4e8e Pandas `Series` \u7684\u7b80\u5355\u793a\u4f8b\uff0c\u4f9b\u60a8\u53c2\u8003\uff1a\n",
        "\n",
        "    clipped_feature = my_dataframe[\"my_feature_name\"].apply(lambda x: max(x, 0))\n",
        "\n",
        "\u4e0a\u8ff0 `clipped_feature` \u6ca1\u6709\u5c0f\u4e8e `0` \u7684\u503c\u3002"
      ]
    },
    {
      "metadata": {
        "id": "rGxjRoYlHbHC",
        "colab_type": "code",
        "colab": {
          "autoexec": {
            "startup": false,
            "wait_interval": 0
          }
        }
      },
      "source": [
        "# YOUR CODE HERE"
      ],
      "cell_type": "code",
      "execution_count": 0,
      "outputs": []
    },
    {
      "metadata": {
        "id": "WvgxW0bUSC-c",
        "colab_type": "text"
      },
      "cell_type": "markdown",
      "source": [
        " ### \u89e3\u51b3\u65b9\u6848\n",
        "\n",
        "\u70b9\u51fb\u4e0b\u65b9\u5373\u53ef\u67e5\u770b\u89e3\u51b3\u65b9\u6848\u3002"
      ]
    },
    {
      "metadata": {
        "id": "8YGNjXPaSMPV",
        "colab_type": "text"
      },
      "cell_type": "markdown",
      "source": [
        " \u6211\u4eec\u5728\u4efb\u52a1 2 \u4e2d\u521b\u5efa\u7684\u76f4\u65b9\u56fe\u663e\u793a\uff0c\u5927\u591a\u6570\u503c\u90fd\u5c0f\u4e8e `5`\u3002\u6211\u4eec\u5c06 `rooms_per_person` \u7684\u503c\u622a\u53d6\u4e3a 5\uff0c\u7136\u540e\u7ed8\u5236\u76f4\u65b9\u56fe\u4ee5\u518d\u6b21\u68c0\u67e5\u7ed3\u679c\u3002"
      ]
    },
    {
      "metadata": {
        "id": "9YyARz6gSR7Q",
        "colab_type": "code",
        "colab": {
          "autoexec": {
            "startup": false,
            "wait_interval": 0
          }
        }
      },
      "source": [
        "california_housing_dataframe[\"rooms_per_person\"] = (\n",
        "    california_housing_dataframe[\"rooms_per_person\"]).apply(lambda x: min(x, 5))\n",
        "\n",
        "_ = california_housing_dataframe[\"rooms_per_person\"].hist()"
      ],
      "cell_type": "code",
      "execution_count": 0,
      "outputs": []
    },
    {
      "metadata": {
        "id": "vO0e1p_aSgKA",
        "colab_type": "text"
      },
      "cell_type": "markdown",
      "source": [
        " \u4e3a\u4e86\u9a8c\u8bc1\u622a\u53d6\u662f\u5426\u6709\u6548\uff0c\u6211\u4eec\u518d\u8bad\u7ec3\u4e00\u6b21\u6a21\u578b\uff0c\u5e76\u518d\u6b21\u8f93\u51fa\u6821\u51c6\u6570\u636e\uff1a"
      ]
    },
    {
      "metadata": {
        "id": "ZgSP2HKfSoOH",
        "colab_type": "code",
        "colab": {
          "autoexec": {
            "startup": false,
            "wait_interval": 0
          }
        }
      },
      "source": [
        "calibration_data = train_model(\n",
        "    learning_rate=0.05,\n",
        "    steps=500,\n",
        "    batch_size=5,\n",
        "    input_feature=\"rooms_per_person\")"
      ],
      "cell_type": "code",
      "execution_count": 0,
      "outputs": []
    },
    {
      "metadata": {
        "id": "gySE-UgfSony",
        "colab_type": "code",
        "colab": {
          "autoexec": {
            "startup": false,
            "wait_interval": 0
          }
        }
      },
      "source": [
        "_ = plt.scatter(calibration_data[\"predictions\"], calibration_data[\"targets\"])"
      ],
      "cell_type": "code",
      "execution_count": 0,
      "outputs": []
    }
  ]
}
